Mexico analysis¶
Load libraries¶
import warnings
from functools import partial
import covid_analysis.utils.paths as path
import janitor
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_flavor as pf
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import prince
import seaborn as sns
from plotly.offline import init_notebook_mode
Set defaults for plots¶
# matplotlib
plt.style.use("seaborn-whitegrid")
plt.rcParams["figure.figsize"] = (10, 8)
# seaborn
sns.set_style("whitegrid")
# plotly
init_notebook_mode()
pio.templates.default = "plotly_white"
pd.options.plotting.backend = "plotly"
# Some plot warninigs
warnings.filterwarnings("ignore")
Load data¶
covid_mex_file = path.data_processed_dir("positive_covid_mex.csv")
covid_mex_df = pd.read_csv(
filepath_or_buffer=covid_mex_file,
parse_dates=["date_admission", "date_symptoms", "date_death"],
date_parser=lambda x: pd.to_datetime(x, errors="coerce")
)
covid_mex_df.head(1)
| origin | sector | state | sex | patient_type | date_admission | date_symptoms | date_death | intubated | pneumonia | ... | epoc | asthma | immunosuppressed | hypertension | other_comorbidity | cardiovascular | obesity | chronic_kidney | smoking | icu | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Out of USMER | SSA | CIUDAD DE MÉXICO | Man | Ambulatory | 2020-10-16 | 2020-10-16 | NaT | Does not apply | No | ... | No | No | No | No | No | No | No | No | No | Does not apply |
1 rows × 23 columns
Calculate new data¶
Death column indicator¶
covid_mex_df["survived"] = covid_mex_df.date_death.isna()
covid_mex_df["survived"] = np.where(covid_mex_df.survived, "Survived", "Death")
Binned ages¶
bins = [x*4 for x in range(0,32)]
labels = ["{}-{}".format(start, end) for start, end in zip(bins[:-1], bins[1:])]
covid_mex_df["age_range"] = pd.cut(covid_mex_df.age, bins=bins, include_lowest=True, labels=labels)
covid_mex_df = covid_mex_df.sort_values("survived", ascending=False)
Covid involvement by age and sex¶
(
covid_mex_df
.groupby(["survived", "sex"])
.size()
.reset_index(name="n")
.pipe(
lambda df: (
px.sunburst(
data_frame=df,
path=["survived", "sex"],
values="n",
labels=dict(
sex="Sex",
n="Count",
),
hover_name="survived",
hover_data=dict(
survived=False,
)
)
)
)
)
(
covid_mex_df
.groupby(["sex", "age_range", "survived"])
.size()
.reset_index(name="count")
.pipe(
lambda df: (
px.bar(
data_frame=df,
x="age_range",
y="count",
color="survived",
facet_row="sex",
labels=dict(
age_range="Age",
count="Confirmed cases",
survived="Status"
),
category_orders=dict(
survived=["Survived", "Death"]
),
log_y=True
)
.for_each_annotation(
lambda a: a.update(text=a.text.split("=")[1])
)
.update_xaxes(tickangle=315)
.update_traces(hovertemplate='Count: %{y} <br> Age: %{x}')
.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
title_x=0.5
)
)
)
)
Time from infection to death¶
(
covid_mex_df
.filter_on("survived == 'Death'")
.assign(
life_time=lambda df: (df.date_death - df.date_symptoms).dt.days
)
.filter_on("life_time > 0 and life_time < 100")
.pipe(
lambda df: (
px.histogram(
data_frame=df,
x="life_time",
color="sex",
marginal="box",
labels=dict(
life_time="Number of days",
count="Count",
sex="Sex"
)
)
.update_traces(hovertemplate='Count: %{y} <br> Time before dying: %{x}')
.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
)
)
)
(
covid_mex_df
.filter_on("survived == 'Death'")
.assign(
life_time=lambda df: (df.date_death - df.date_symptoms).dt.days
)
.filter_on("life_time > 0")
.filter_on("icu in ['Yes', 'No']")
.select_columns(["age_range", "icu", "life_time"])
.groupby(["age_range", "icu"])
.life_time
.describe()
[["count", "mean"]]
.reset_index()
.pipe(
lambda df: (
px.bar(
data_frame=df,
x="age_range",
y="count",
color="mean",
facet_row="icu",
labels=dict(
age_range="Age range",
count="Count",
mean="Mean lifetime",
icu="ICU"
),
barmode="group"
)
.update_yaxes(matches=None, showticklabels=True)
.update_xaxes(tickangle=315)
.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
)
)
)
Number of deaths per state and where they happened¶
(
covid_mex_df
.groupby(["state", "origin", "sector", "patient_type"])
.size()
.reset_index(name="n")
.pipe(
lambda df: (
px.treemap(
data_frame=df,
path=[px.Constant("All"), "state", "origin", "sector", "patient_type"],
values="n",
labels=dict(
n="Number of deaths"
)
)
)
)
)
Patien conditions for survival¶
yes_or_no = [
'sex', 'intubated', 'pneumonia',
'pregnancy', 'diabetes', 'epoc', 'asthma', 'immunosuppressed',
'hypertension', 'other_comorbidity', 'cardiovascular', 'obesity',
'chronic_kidney', 'smoking', 'icu', 'survived'
]
def iplot_coordinates(
mca,
X,
x_component=1,
y_component=2,
show_row_points=False,
show_column_points=True,
highlight = None,
text_size=10
):
x, y = x_component - 1, y_component - 1
color, text = None, None
if show_row_points:
df = (
mca.
row_coordinates(X)[[x_component, y_component]])
if highlight:
df[highlight] = X[highlight]
color = highlight
else:
df = (
mca.
column_coordinates(X)[[x_component, y_component]].
reset_index().
rename(columns={"index": "full_text"}).
assign(
full_text=lambda x: x.full_text.str.split("_"),
Category =lambda x: x.full_text.apply(lambda y: y[0]),
Values = lambda x: x.full_text.apply(lambda y: "_".join(y[1:]))
)
)
color = "Category"
text = "Values"
df.rename(columns={x_component:"x", y_component:"y"}, inplace=True)
fig = px.scatter(
data_frame=df,
x="x",
y="y",
color=color,
text=text,
labels = {
"x": "Component {} ({:.2f}% intertia)".format(x_component, mca.explained_inertia_[x_component] * 100),
"y": "Component {} ({:.2f}% intertia)".format(y_component, mca.explained_inertia_[y_component] * 100)
}
)
fig.update_traces(textposition="top right")
fig.update_layout(
font=dict(size=text_size),
shapes=[
dict(
type="line",
yref="paper", y0=0, y1=1,
xref="x", x0=0, x1=0
),
dict(
type="line",
yref="y", y0=0, y1=0,
xref="paper", x0=0, x1=1
),
]
)
return fig
X = (
covid_mex_df
.select_columns(yes_or_no)
)
mca = prince.MCA(n_components=4, random_state=42)
mca = mca.fit(X)
mca
MCA(n_components=4, random_state=42)
import functools
x_component, y_component = 1, 2
plot_mca = functools.partial(iplot_coordinates, mca=mca, X=X, x_component=x_component, y_component=y_component)
plot_mca(show_row_points=False)
(
covid_mex_df
.filter_on("survived == 'Death'")
.pipe(
lambda df: (
px.parallel_categories(
data_frame=df,
dimensions=["icu", "chronic_kidney", "intubated", "pneumonia", "epoc", "cardiovascular"],
labels=dict(
icu="ICU",
chronic_kidney="Chronic kidney",
intubated="Intubated",
pneumonia="Pneumonia",
epoc="EPOC",
cardiovascular="Cardiovascular"
)
)
.update_traces(hoveron="color", hoverinfo="count+probability")
)
)
)